clear 
set more off

*--------------------*
* Preliminary steps 
*--------------------*

use "$PSID/RawData/PSID_raw_indfam.dta", clear

* Unique identifier 
    gen father_id = famid*1000 + personnumber
    order father_id, before(famid)
    label var father_id "Father 1968 ID"
    
    merge 1:1 father_id using "$PSID/RawData/number_children_info.dta"
    drop if _merge==2
    drop _merge

/*  Following Mazumder paper, only want certain parts of the PSID: 
    the main nationally representative sample (SRC). 
    The SEO sample (5000<famid<7000) is used in robustness checks. 
    The immigrant samples have values between 3000 and 5000, 
    and Latino families added later have values >7000. 
    
    Source: https://psidonline.isr.umich.edu/guide/faq.aspx   */
    gen src_sample = famid<3000
    gen seo_sample = famid>5000 & famid<7000
    keep if src_sample==1 | seo_sample==1 
    
    * Dummy: father in 1968
    gen father_1968=0
    replace father_1968=1 if head==1 & (number_children_away_1968>0 & number_children_away_1968<.)
    replace father_1968=1 if head==1 & (number_children_inst_1968>0 & number_children_inst_1968<.)
    replace father_1968=1 if head==1 & (number_children_FU_1968>0 & number_children_FU_1968<.)
    
* Locate fathers using downloaded PSID FIMS extract
    merge 1:1 father_id using "$PSID/FIMS/FIMSFathers_SRC_SEO.dta"
    drop if _merge==2
    
    gen father_fims = _merge==3
    drop _merge
    
    gen father = father_1968==1 & father_fims==1
    
*  Keep male respondents aged 30-50 in 1968
    keep if age1968>=30 & age1968<=50 & sex==1
    
* Bring in info on actual income for fathers and children
    foreach x in 1 5 10 {
        preserve
        use "$PSID/output/PSIDFatherSONS_IGEanalysis_`x'yrs.dta", clear
        bysort father_id: keep if _n==1
        keep father_id
        
        tempfile tempie`x'
        save `tempie`x''
        restore
    }

    foreach x in 1 5 10 {
        merge 1:1 father_id using `tempie`x''
        drop if _merge==2
        gen father_son30to50_father`x'income = father==1 & _merge==3
        drop _merge
    }

*------------*

* Rename variables for table
    rename grade1968 grade  
    rename indweight1968 weight
    rename totfaminc1968 faminc
    rename age1968 age
    rename race1968 race
    
    replace grade=. if grade>=98 | grade==0 | grade==99 /*note: "0" = "inappropriate" */  
    gen hs_ed = grade>=12 & grade<.
    gen coll_ed = grade>=16 & grade<.
    
    gen black = race==2
    sum black [aw=weight]
    gen all =1
    
    keep father_id weight faminc grade hs_ed coll_ed age black father_son30* father all father_1968

*------------------------------------------*
*------------------------------------------*

*--------------------------------*
* Table of summary statistics
*--------------------------------*

* 1. Set up table
    file open Table_SumStats using "$Mydirectory2/appendix_a/PSID_summarystats_1968men.tex", write replace

    file write Table_SumStats "\begin{tabularx}{\hsize}{@{\hskip\tabcolsep\extracolsep\fill} l c c c c c c}" _n
    file write Table_SumStats "\toprule" _n
    file write Table_SumStats "\addlinespace[2ex]" _n
    file write Table_SumStats "& & & & \multicolumn{3}{c}{Father's Income}  \\" _n
    file write Table_SumStats "\addlinespace[1ex]" _n
    file write Table_SumStats "\cmidrule(lr){5-7} " _n
    file write Table_SumStats "\addlinespace[1ex]" _n
    file write Table_SumStats "& \shortstack{1968 \\ Men} & \shortstack{1968 \\ Fathers} & \shortstack{With Child \\ in Survey} & 1 year & 5 years & 10 years  \\" _n

    file write Table_SumStats "\addlinespace[1ex]" _n
    file write Table_SumStats "\midrule" _n
    
* 2. Father demographics  

    file write Table_SumStats "\addlinespace[2ex]" _n
    
    foreach var of varlist age black hs_ed coll_ed faminc  {
    
    if "`var'"=="age" local label1 "Age"
    if "`var'"=="black" local label1 "Black"
    if "`var'"=="hs_ed" local label1 "HS educated"
    if "`var'"=="coll_ed" local label1 "College edu."
    if "`var'"=="faminc" local label1 "Family income"

        file write Table_SumStats "\quad `label1' "

        foreach group in all father_1968 father father_son30to50_father1income father_son30to50_father5income father_son30to50_father10income {
        
            sum `var' if `group'==1 [aw=weight]
            local number =`r(mean)'
            
            if "`var'"=="faminc" {
                local number2: display %-09.0fc `number'
            }
            else {
                local number2: display %-09.2fc `number'        
            }
            
            file write Table_SumStats "& `number2' "
        }

    file write Table_SumStats " \\" _n
    
    }
    
* 3. Number of observations
    file write Table_SumStats "\addlinespace[2ex]" _n
    file write Table_SumStats "\midrule" _n 
    file write Table_SumStats "\addlinespace[1ex]" _n
    
    file write Table_SumStats "Observations"
    
    foreach group in all father_1968 father father_son30to50_father1income father_son30to50_father5income father_son30to50_father10income {
        count if `group'==1 
            local number =`r(N)'
            local number2: display %-09.0fc `number'
            file write Table_SumStats "& `number2' "
    }

    file write Table_SumStats " \\" _n
    
    file write Table_SumStats "\addlinespace[1ex]" _n
    file write Table_SumStats "\bottomrule" _n
    file write Table_SumStats "\end{tabularx}" _n
    file close Table_SumStats
